package de.webcrawler;

import java.io.IOException;
import java.io.InputStream;
import java.util.List;

import net.htmlparser.jericho.Element;
import net.htmlparser.jericho.Source;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;

import de.preisrobot.service.helper.HttpClientFactory;

public class Webcrawler {
	


	public static void main(String[] args) throws HttpException, IOException {
		
		String sourceUrlString = "http://www.amazon.de/b/ref=sa_menu_desk3?ie=UTF8&node=514699031";//"http://www.idealo.de/";
		
		HttpClient client = HttpClientFactory.getHttpClient();
	
		GetMethod method = HttpClientFactory.createGETMethod(sourceUrlString);
		client.executeMethod(method);

		try {
			
			InputStream in = method.getResponseBodyAsStream();
			parseStream( in);
			
		} catch (Exception e) {


		} finally {
			
			method.releaseConnection();
		}
		return  ;
	}

	private static void parseStream(InputStream in) throws Exception {
		Source source = new Source(in);

		List<Element> elementList = source.getAllElements("a");
		
		for (Element element : elementList) {
			 String attributeValue = element.getAttributeValue("href");
			 
			 if(true ||attributeValue.contains("Category"))
			 {
//					if(! attributeValue.startsWith(amazon_url))
				
			 }
		}
	}
}
